The objectives are

Importing the necessary libraries

options(warn=-1)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.1     v purrr   0.3.4
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x plotly::filter() masks dplyr::filter(), stats::filter()
## x dplyr::lag()     masks stats::lag()
library(htmlwidgets)
library(ggcorrplot)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(ggpubr)
library(readr)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:ggpubr':
## 
##     gghistogram
library(fmsb)
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
require(maps)
## Loading required package: maps
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
require(viridis)
## Loading required package: viridis
## Loading required package: viridisLite
## 
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
## 
##     viridis_pal

Reading the Dataset

dataset <- read.csv("players_21.csv")
copy_dataset <- dataset 

Viewing the Dataset

head(dataset)
##   sofifa_id                                                         player_url
## 1    158023               https://sofifa.com/player/158023/lionel-messi/210002
## 2     20801 https://sofifa.com/player/20801/c-ronaldo-dos-santos-aveiro/210002
## 3    200389                  https://sofifa.com/player/200389/jan-oblak/210002
## 4    188545         https://sofifa.com/player/188545/robert-lewandowski/210002
## 5    190871  https://sofifa.com/player/190871/neymar-da-silva-santos-jr/210002
## 6    192985            https://sofifa.com/player/192985/kevin-de-bruyne/210002
##          short_name                           long_name age        dob
## 1          L. Messi     Lionel Andrés Messi Cuccittini  33 1987-06-24
## 2 Cristiano Ronaldo Cristiano Ronaldo dos Santos Aveiro  35 1985-02-05
## 3          J. Oblak                           Jan Oblak  27 1993-01-07
## 4    R. Lewandowski                  Robert Lewandowski  31 1988-08-21
## 5         Neymar Jr      Neymar da Silva Santos Júnior  28 1992-02-05
## 6      K. De Bruyne                     Kevin De Bruyne  29 1991-06-28
##   height_cm weight_kg nationality           club_name            league_name
## 1       170        72   Argentina        FC Barcelona Spain Primera Division
## 2       187        83    Portugal            Juventus        Italian Serie A
## 3       188        87    Slovenia    Atlético Madrid Spain Primera Division
## 4       184        80      Poland  FC Bayern München   German 1. Bundesliga
## 5       175        68      Brazil Paris Saint-Germain         French Ligue 1
## 6       181        70     Belgium     Manchester City English Premier League
##   league_rank overall potential value_eur wage_eur player_positions
## 1           1      93        93  67500000   560000       RW, ST, CF
## 2           1      92        92  46000000   220000           ST, LW
## 3           1      91        93  75000000   125000               GK
## 4           1      91        91  80000000   240000               ST
## 5           1      91        91  90000000   270000          LW, CAM
## 6           1      91        91  87000000   370000          CAM, CM
##   preferred_foot international_reputation weak_foot skill_moves     work_rate
## 1           Left                        5         4           4    Medium/Low
## 2          Right                        5         4           5      High/Low
## 3          Right                        3         3           1 Medium/Medium
## 4          Right                        4         4           4   High/Medium
## 5          Right                        5         5           5   High/Medium
## 6          Right                        4         5           4     High/High
##              body_type real_face release_clause_eur
## 1                Messi       Yes          138400000
## 2           C. Ronaldo       Yes           75900000
## 3 PLAYER_BODY_TYPE_259       Yes          159400000
## 4 PLAYER_BODY_TYPE_276       Yes          132000000
## 5               Neymar       Yes          166500000
## 6 PLAYER_BODY_TYPE_321       Yes          161000000
##                                                                                                                player_tags
## 1                            #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward
## 2                            #Aerial Threat, #Dribbler, #Distance Shooter, #Acrobat, #Clinical Finisher, #Complete Forward
## 3                                                                                                                         
## 4                                                                                    #Distance Shooter, #Clinical Finisher
## 5 #Speedster, #Dribbler, #Playmaker, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Midfielder, #Complete Forward
## 6                                        #Dribbler, #Playmaker, #Engine, #Distance Shooter, #Crosser, #Complete Midfielder
##   team_position team_jersey_number loaned_from     joined contract_valid_until
## 1           CAM                 10             2004-07-01                 2021
## 2            LS                  7             2018-07-10                 2022
## 3            GK                 13             2014-07-16                 2023
## 4            ST                  9             2014-07-01                 2023
## 5            LW                 10             2017-08-03                 2022
## 6           RCM                 17             2015-08-30                 2023
##   nation_position nation_jersey_number pace shooting passing dribbling
## 1              RW                   10   85       92      91        95
## 2              LS                    7   89       93      81        89
## 3              GK                    1   NA       NA      NA        NA
## 4                                   NA   78       91      78        85
## 5                                   NA   91       85      86        94
## 6             RCM                    7   76       86      93        88
##   defending physic gk_diving gk_handling gk_kicking gk_reflexes gk_speed
## 1        38     65        NA          NA         NA          NA       NA
## 2        35     77        NA          NA         NA          NA       NA
## 3        NA     NA        87          92         78          90       52
## 4        43     82        NA          NA         NA          NA       NA
## 5        36     59        NA          NA         NA          NA       NA
## 6        64     78        NA          NA         NA          NA       NA
##   gk_positioning
## 1             NA
## 2             NA
## 3             90
## 4             NA
## 5             NA
## 6             NA
##                                                                                                                              player_traits
## 1 Finesse Shot, Long Shot Taker (AI), Speed Dribbler (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Team Player, Chip Shot (AI)
## 2                                                     Power Free-Kick, Flair, Long Shot Taker (AI), Speed Dribbler (AI), Outside Foot Shot
## 3                                                                                                         GK Long Throw, Comes For Crosses
## 4                                                                            Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI)
## 5                                                     Injury Prone, Flair, Speed Dribbler (AI), Outside Foot Shot, Technical Dribbler (AI)
## 6                       Injury Prone, Leadership, Early Crosser, Long Passer (AI), Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot
##   attacking_crossing attacking_finishing attacking_heading_accuracy
## 1                 85                  95                         70
## 2                 84                  95                         90
## 3                 13                  11                         15
## 4                 71                  94                         85
## 5                 85                  87                         62
## 6                 94                  82                         55
##   attacking_short_passing attacking_volleys skill_dribbling skill_curve
## 1                      91                88              96          93
## 2                      82                86              88          81
## 3                      43                13              12          13
## 4                      84                89              85          79
## 5                      87                87              95          88
## 6                      94                82              88          85
##   skill_fk_accuracy skill_long_passing skill_ball_control movement_acceleration
## 1                94                 91                 96                    91
## 2                76                 77                 92                    87
## 3                14                 40                 30                    43
## 4                85                 70                 88                    77
## 5                89                 81                 95                    94
## 6                83                 93                 92                    77
##   movement_sprint_speed movement_agility movement_reactions movement_balance
## 1                    80               91                 94               95
## 2                    91               87                 95               71
## 3                    60               67                 88               49
## 4                    78               77                 93               82
## 5                    89               96                 91               83
## 6                    76               78                 91               76
##   power_shot_power power_jumping power_stamina power_strength power_long_shots
## 1               86            68            72             69               94
## 2               94            95            84             78               93
## 3               59            78            41             78               12
## 4               89            84            76             86               85
## 5               80            62            81             50               84
## 6               91            63            89             74               91
##   mentality_aggression mentality_interceptions mentality_positioning
## 1                   44                      40                    93
## 2                   63                      29                    95
## 3                   34                      19                    11
## 4                   81                      49                    94
## 5                   51                      36                    87
## 6                   76                      66                    88
##   mentality_vision mentality_penalties mentality_composure defending_marking
## 1               95                  75                  96                NA
## 2               82                  84                  95                NA
## 3               65                  11                  68                NA
## 4               79                  88                  88                NA
## 5               90                  92                  93                NA
## 6               94                  84                  91                NA
##   defending_standing_tackle defending_sliding_tackle goalkeeping_diving
## 1                        35                       24                  6
## 2                        32                       24                  7
## 3                        12                       18                 87
## 4                        42                       19                 15
## 5                        30                       29                  9
## 6                        65                       53                 15
##   goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
## 1                   11                  15                      14
## 2                   11                  15                      14
## 3                   92                  78                      90
## 4                    6                  12                       8
## 5                    9                  15                      15
## 6                   13                   5                      10
##   goalkeeping_reflexes   ls   st   rs   lw   lf   cf   rf   rw  lam  cam  ram
## 1                    8 89+3 89+3 89+3 92+0 93+0 93+0 93+0 92+0 93+0 93+0 93+0
## 2                   11 91+1 91+1 91+1 89+0 91+0 91+0 91+0 89+0 88+3 88+3 88+3
## 3                   90 33+3 33+3 33+3 32+0 35+0 35+0 35+0 32+0 38+3 38+3 38+3
## 4                   10 89+2 89+2 89+2 85+0 87+0 87+0 87+0 85+0 85+3 85+3 85+3
## 5                   11 84+3 84+3 84+3 90+0 89+0 89+0 89+0 90+0 90+1 90+1 90+1
## 6                   13 83+3 83+3 83+3 88+0 88+0 88+0 88+0 88+0 89+2 89+2 89+2
##     lm  lcm   cm  rcm   rm  lwb  ldm  cdm  rdm  rwb   lb  lcb   cb  rcb   rb
## 1 91+2 87+3 87+3 87+3 91+2 66+3 65+3 65+3 65+3 66+3 62+3 52+3 52+3 52+3 62+3
## 2 88+3 81+3 81+3 81+3 88+3 65+3 61+3 61+3 61+3 65+3 61+3 54+3 54+3 54+3 61+3
## 3 35+3 38+3 38+3 38+3 35+3 32+3 36+3 36+3 36+3 32+3 32+3 33+3 33+3 33+3 32+3
## 4 83+3 79+3 79+3 79+3 83+3 64+3 65+3 65+3 65+3 64+3 61+3 60+3 60+3 60+3 61+3
## 5 90+1 83+3 83+3 83+3 90+1 67+3 62+3 62+3 62+3 67+3 62+3 49+3 49+3 49+3 62+3
## 6 89+2 89+2 89+2 89+2 89+2 79+3 80+3 80+3 80+3 79+3 75+3 69+3 69+3 69+3 75+3

Structure of Dataset

str(dataset)
## 'data.frame':    18944 obs. of  106 variables:
##  $ sofifa_id                 : int  158023 20801 200389 188545 190871 192985 231747 192448 203376 212831 ...
##  $ player_url                : chr  "https://sofifa.com/player/158023/lionel-messi/210002" "https://sofifa.com/player/20801/c-ronaldo-dos-santos-aveiro/210002" "https://sofifa.com/player/200389/jan-oblak/210002" "https://sofifa.com/player/188545/robert-lewandowski/210002" ...
##  $ short_name                : chr  "L. Messi" "Cristiano Ronaldo" "J. Oblak" "R. Lewandowski" ...
##  $ long_name                 : chr  "Lionel Andrés Messi Cuccittini" "Cristiano Ronaldo dos Santos Aveiro" "Jan Oblak" "Robert Lewandowski" ...
##  $ age                       : int  33 35 27 31 28 29 21 28 28 27 ...
##  $ dob                       : chr  "1987-06-24" "1985-02-05" "1993-01-07" "1988-08-21" ...
##  $ height_cm                 : int  170 187 188 184 175 181 178 187 193 191 ...
##  $ weight_kg                 : int  72 83 87 80 68 70 73 85 92 91 ...
##  $ nationality               : chr  "Argentina" "Portugal" "Slovenia" "Poland" ...
##  $ club_name                 : chr  "FC Barcelona" "Juventus" "Atlético Madrid" "FC Bayern München" ...
##  $ league_name               : chr  "Spain Primera Division" "Italian Serie A" "Spain Primera Division" "German 1. Bundesliga" ...
##  $ league_rank               : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ overall                   : int  93 92 91 91 91 91 90 90 90 90 ...
##  $ potential                 : int  93 92 93 91 91 91 95 93 91 91 ...
##  $ value_eur                 : int  67500000 46000000 75000000 80000000 90000000 87000000 105500000 69500000 75500000 62500000 ...
##  $ wage_eur                  : int  560000 220000 125000 240000 270000 370000 160000 260000 210000 160000 ...
##  $ player_positions          : chr  "RW, ST, CF" "ST, LW" "GK" "ST" ...
##  $ preferred_foot            : chr  "Left" "Right" "Right" "Right" ...
##  $ international_reputation  : int  5 5 3 4 5 4 3 3 3 3 ...
##  $ weak_foot                 : int  4 4 3 4 5 5 4 4 3 3 ...
##  $ skill_moves               : int  4 5 1 4 5 4 5 1 2 1 ...
##  $ work_rate                 : chr  "Medium/Low" "High/Low" "Medium/Medium" "High/Medium" ...
##  $ body_type                 : chr  "Messi" "C. Ronaldo" "PLAYER_BODY_TYPE_259" "PLAYER_BODY_TYPE_276" ...
##  $ real_face                 : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ release_clause_eur        : int  138400000 75900000 159400000 132000000 166500000 161000000 203100000 147700000 145300000 120300000 ...
##  $ player_tags               : chr  "#Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward" "#Aerial Threat, #Dribbler, #Distance Shooter, #Acrobat, #Clinical Finisher, #Complete Forward" "" "#Distance Shooter, #Clinical Finisher" ...
##  $ team_position             : chr  "CAM" "LS" "GK" "ST" ...
##  $ team_jersey_number        : int  10 7 13 9 10 17 7 1 4 1 ...
##  $ loaned_from               : chr  "" "" "" "" ...
##  $ joined                    : chr  "2004-07-01" "2018-07-10" "2014-07-16" "2014-07-01" ...
##  $ contract_valid_until      : int  2021 2022 2023 2023 2022 2023 2022 2022 2023 2024 ...
##  $ nation_position           : chr  "RW" "LS" "GK" "" ...
##  $ nation_jersey_number      : int  10 7 1 NA NA 7 10 22 4 NA ...
##  $ pace                      : int  85 89 NA 78 91 76 96 NA 76 NA ...
##  $ shooting                  : int  92 93 NA 91 85 86 86 NA 60 NA ...
##  $ passing                   : int  91 81 NA 78 86 93 78 NA 71 NA ...
##  $ dribbling                 : int  95 89 NA 85 94 88 91 NA 71 NA ...
##  $ defending                 : int  38 35 NA 43 36 64 39 NA 91 NA ...
##  $ physic                    : int  65 77 NA 82 59 78 76 NA 86 NA ...
##  $ gk_diving                 : int  NA NA 87 NA NA NA NA 88 NA 86 ...
##  $ gk_handling               : int  NA NA 92 NA NA NA NA 85 NA 88 ...
##  $ gk_kicking                : int  NA NA 78 NA NA NA NA 88 NA 85 ...
##  $ gk_reflexes               : int  NA NA 90 NA NA NA NA 90 NA 89 ...
##  $ gk_speed                  : int  NA NA 52 NA NA NA NA 45 NA 51 ...
##  $ gk_positioning            : int  NA NA 90 NA NA NA NA 88 NA 91 ...
##  $ player_traits             : chr  "Finesse Shot, Long Shot Taker (AI), Speed Dribbler (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Te"| __truncated__ "Power Free-Kick, Flair, Long Shot Taker (AI), Speed Dribbler (AI), Outside Foot Shot" "GK Long Throw, Comes For Crosses" "Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI)" ...
##  $ attacking_crossing        : int  85 84 13 71 85 94 78 18 53 17 ...
##  $ attacking_finishing       : int  95 95 11 94 87 82 91 14 52 13 ...
##  $ attacking_heading_accuracy: int  70 90 15 85 62 55 73 11 87 19 ...
##  $ attacking_short_passing   : int  91 82 43 84 87 94 83 61 79 45 ...
##  $ attacking_volleys         : int  88 86 13 89 87 82 83 14 45 20 ...
##  $ skill_dribbling           : int  96 88 12 85 95 88 92 21 70 27 ...
##  $ skill_curve               : int  93 81 13 79 88 85 79 18 60 19 ...
##  $ skill_fk_accuracy         : int  94 76 14 85 89 83 63 12 70 18 ...
##  $ skill_long_passing        : int  91 77 40 70 81 93 70 63 86 44 ...
##  $ skill_ball_control        : int  96 92 30 88 95 92 90 30 77 30 ...
##  $ movement_acceleration     : int  91 87 43 77 94 77 96 38 72 56 ...
##  $ movement_sprint_speed     : int  80 91 60 78 89 76 96 50 79 47 ...
##  $ movement_agility          : int  91 87 67 77 96 78 92 37 61 40 ...
##  $ movement_reactions        : int  94 95 88 93 91 91 92 86 89 88 ...
##  $ movement_balance          : int  95 71 49 82 83 76 82 43 53 37 ...
##  $ power_shot_power          : int  86 94 59 89 80 91 86 66 81 64 ...
##  $ power_jumping             : int  68 95 78 84 62 63 77 79 90 52 ...
##  $ power_stamina             : int  72 84 41 76 81 89 86 35 75 32 ...
##  $ power_strength            : int  69 78 78 86 50 74 76 78 92 78 ...
##  $ power_long_shots          : int  94 93 12 85 84 91 79 10 64 14 ...
##  $ mentality_aggression      : int  44 63 34 81 51 76 62 43 83 27 ...
##  $ mentality_interceptions   : int  40 29 19 49 36 66 38 22 90 11 ...
##  $ mentality_positioning     : int  93 95 11 94 87 88 91 11 47 13 ...
##  $ mentality_vision          : int  95 82 65 79 90 94 80 70 65 66 ...
##  $ mentality_penalties       : int  75 84 11 88 92 84 70 25 62 23 ...
##  $ mentality_composure       : int  96 95 68 88 93 91 84 70 90 65 ...
##  $ defending_marking         : logi  NA NA NA NA NA NA ...
##  $ defending_standing_tackle : int  35 32 12 42 30 65 34 13 93 19 ...
##  $ defending_sliding_tackle  : int  24 24 18 19 29 53 32 10 86 16 ...
##  $ goalkeeping_diving        : int  6 7 87 15 9 15 13 88 13 86 ...
##  $ goalkeeping_handling      : int  11 11 92 6 9 13 5 85 10 88 ...
##  $ goalkeeping_kicking       : int  15 15 78 12 15 5 7 88 13 85 ...
##  $ goalkeeping_positioning   : int  14 14 90 8 15 10 11 88 11 91 ...
##  $ goalkeeping_reflexes      : int  8 11 90 10 11 13 6 90 11 89 ...
##  $ ls                        : chr  "89+3" "91+1" "33+3" "89+2" ...
##  $ st                        : chr  "89+3" "91+1" "33+3" "89+2" ...
##  $ rs                        : chr  "89+3" "91+1" "33+3" "89+2" ...
##  $ lw                        : chr  "92+0" "89+0" "32+0" "85+0" ...
##  $ lf                        : chr  "93+0" "91+0" "35+0" "87+0" ...
##  $ cf                        : chr  "93+0" "91+0" "35+0" "87+0" ...
##  $ rf                        : chr  "93+0" "91+0" "35+0" "87+0" ...
##  $ rw                        : chr  "92+0" "89+0" "32+0" "85+0" ...
##  $ lam                       : chr  "93+0" "88+3" "38+3" "85+3" ...
##  $ cam                       : chr  "93+0" "88+3" "38+3" "85+3" ...
##  $ ram                       : chr  "93+0" "88+3" "38+3" "85+3" ...
##  $ lm                        : chr  "91+2" "88+3" "35+3" "83+3" ...
##  $ lcm                       : chr  "87+3" "81+3" "38+3" "79+3" ...
##  $ cm                        : chr  "87+3" "81+3" "38+3" "79+3" ...
##  $ rcm                       : chr  "87+3" "81+3" "38+3" "79+3" ...
##  $ rm                        : chr  "91+2" "88+3" "35+3" "83+3" ...
##  $ lwb                       : chr  "66+3" "65+3" "32+3" "64+3" ...
##  $ ldm                       : chr  "65+3" "61+3" "36+3" "65+3" ...
##  $ cdm                       : chr  "65+3" "61+3" "36+3" "65+3" ...
##   [list output truncated]
dimensions <- dim(dataset)
paste("Rows :", dimensions[1], " Columns :", dimensions[2], sep = " ")
## [1] "Rows : 18944  Columns : 106"

Checking for Null Values

summary(dataset)
##    sofifa_id       player_url         short_name         long_name        
##  Min.   :    41   Length:18944       Length:18944       Length:18944      
##  1st Qu.:210031   Class :character   Class :character   Class :character  
##  Median :232315   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :226242                                                           
##  3rd Qu.:246760                                                           
##  Max.   :258970                                                           
##                                                                           
##       age            dob              height_cm       weight_kg     
##  Min.   :16.00   Length:18944       Min.   :155.0   Min.   : 50.00  
##  1st Qu.:21.00   Class :character   1st Qu.:176.0   1st Qu.: 70.00  
##  Median :25.00   Mode  :character   Median :181.0   Median : 75.00  
##  Mean   :25.23                      Mean   :181.2   Mean   : 75.02  
##  3rd Qu.:29.00                      3rd Qu.:186.0   3rd Qu.: 80.00  
##  Max.   :53.00                      Max.   :206.0   Max.   :110.00  
##                                                                     
##  nationality         club_name         league_name         league_rank   
##  Length:18944       Length:18944       Length:18944       Min.   :1.000  
##  Class :character   Class :character   Class :character   1st Qu.:1.000  
##  Mode  :character   Mode  :character   Mode  :character   Median :1.000  
##                                                           Mean   :1.357  
##                                                           3rd Qu.:1.000  
##                                                           Max.   :4.000  
##                                                           NA's   :225    
##     overall        potential       value_eur            wage_eur     
##  Min.   :47.00   Min.   :47.00   Min.   :        0   Min.   :     0  
##  1st Qu.:61.00   1st Qu.:67.00   1st Qu.:   300000   1st Qu.:  1000  
##  Median :66.00   Median :71.00   Median :   650000   Median :  3000  
##  Mean   :65.68   Mean   :71.09   Mean   :  2224813   Mean   :  8676  
##  3rd Qu.:70.00   3rd Qu.:75.00   3rd Qu.:  1800000   3rd Qu.:  7000  
##  Max.   :93.00   Max.   :95.00   Max.   :105500000   Max.   :560000  
##                                                                      
##  player_positions   preferred_foot     international_reputation   weak_foot    
##  Length:18944       Length:18944       Min.   :1.000            Min.   :1.000  
##  Class :character   Class :character   1st Qu.:1.000            1st Qu.:3.000  
##  Mode  :character   Mode  :character   Median :1.000            Median :3.000  
##                                        Mean   :1.092            Mean   :2.937  
##                                        3rd Qu.:1.000            3rd Qu.:3.000  
##                                        Max.   :5.000            Max.   :5.000  
##                                                                                
##   skill_moves     work_rate          body_type          real_face        
##  Min.   :1.000   Length:18944       Length:18944       Length:18944      
##  1st Qu.:2.000   Class :character   Class :character   Class :character  
##  Median :2.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2.363                                                           
##  3rd Qu.:3.000                                                           
##  Max.   :5.000                                                           
##                                                                          
##  release_clause_eur  player_tags        team_position      team_jersey_number
##  Min.   :     9000   Length:18944       Length:18944       Min.   : 1.00     
##  1st Qu.:   525000   Class :character   Class :character   1st Qu.: 9.00     
##  Median :  1100000   Mode  :character   Mode  :character   Median :18.00     
##  Mean   :  4296353                                         Mean   :20.59     
##  3rd Qu.:  3200000                                         3rd Qu.:27.00     
##  Max.   :203100000                                         Max.   :99.00     
##  NA's   :995                                               NA's   :225       
##  loaned_from           joined          contract_valid_until nation_position   
##  Length:18944       Length:18944       Min.   :2020         Length:18944      
##  Class :character   Class :character   1st Qu.:2021         Class :character  
##  Mode  :character   Mode  :character   Median :2022         Mode  :character  
##                                        Mean   :2022                           
##                                        3rd Qu.:2023                           
##                                        Max.   :2028                           
##                                        NA's   :225                            
##  nation_jersey_number      pace          shooting        passing     
##  Min.   : 1.00        Min.   :25.00   Min.   :16.00   Min.   :25.00  
##  1st Qu.: 6.00        1st Qu.:62.00   1st Qu.:42.00   1st Qu.:50.00  
##  Median :12.00        Median :68.00   Median :54.00   Median :58.00  
##  Mean   :12.03        Mean   :67.67   Mean   :52.27   Mean   :57.14  
##  3rd Qu.:18.00        3rd Qu.:75.00   3rd Qu.:63.00   3rd Qu.:64.00  
##  Max.   :27.00        Max.   :96.00   Max.   :93.00   Max.   :93.00  
##  NA's   :17817        NA's   :2083    NA's   :2083    NA's   :2083   
##    dribbling       defending         physic        gk_diving    
##  Min.   :25.00   Min.   :15.00   Min.   :28.00   Min.   :45.00  
##  1st Qu.:57.00   1st Qu.:36.00   1st Qu.:58.00   1st Qu.:60.00  
##  Median :64.00   Median :56.00   Median :66.00   Median :65.00  
##  Mean   :62.46   Mean   :51.32   Mean   :64.46   Mean   :65.16  
##  3rd Qu.:69.00   3rd Qu.:64.00   3rd Qu.:72.00   3rd Qu.:70.00  
##  Max.   :95.00   Max.   :91.00   Max.   :91.00   Max.   :90.00  
##  NA's   :2083    NA's   :2083    NA's   :2083    NA's   :16861  
##   gk_handling      gk_kicking     gk_reflexes       gk_speed    
##  Min.   :43.00   Min.   :35.00   Min.   :44.00   Min.   :12.0   
##  1st Qu.:58.00   1st Qu.:57.00   1st Qu.:60.00   1st Qu.:28.0   
##  Median :63.00   Median :61.00   Median :66.00   Median :37.0   
##  Mean   :62.89   Mean   :61.72   Mean   :66.11   Mean   :37.2   
##  3rd Qu.:68.00   3rd Qu.:66.00   3rd Qu.:72.00   3rd Qu.:45.0   
##  Max.   :92.00   Max.   :93.00   Max.   :90.00   Max.   :65.0   
##  NA's   :16861   NA's   :16861   NA's   :16861   NA's   :16861  
##  gk_positioning  player_traits      attacking_crossing attacking_finishing
##  Min.   :38.00   Length:18944       Min.   : 6.00      Min.   : 3.0       
##  1st Qu.:57.00   Class :character   1st Qu.:38.00      1st Qu.:30.0       
##  Median :63.00   Mode  :character   Median :54.00      Median :49.0       
##  Mean   :63.17                      Mean   :49.61      Mean   :45.8       
##  3rd Qu.:69.00                      3rd Qu.:63.00      3rd Qu.:62.0       
##  Max.   :91.00                      Max.   :94.00      Max.   :95.0       
##  NA's   :16861                                                            
##  attacking_heading_accuracy attacking_short_passing attacking_volleys
##  Min.   : 5.00              Min.   : 7.00           Min.   : 3.00    
##  1st Qu.:44.00              1st Qu.:54.00           1st Qu.:30.00    
##  Median :55.00              Median :62.00           Median :44.00    
##  Mean   :51.87              Mean   :58.71           Mean   :42.67    
##  3rd Qu.:64.00              3rd Qu.:68.00           3rd Qu.:56.00    
##  Max.   :93.00              Max.   :94.00           Max.   :90.00    
##                                                                      
##  skill_dribbling  skill_curve    skill_fk_accuracy skill_long_passing
##  Min.   : 5.00   Min.   : 4.00   Min.   : 5.00     Min.   : 5.00     
##  1st Qu.:49.00   1st Qu.:35.00   1st Qu.:31.00     1st Qu.:43.00     
##  Median :61.00   Median :48.00   Median :41.00     Median :56.00     
##  Mean   :55.55   Mean   :47.19   Mean   :42.36     Mean   :52.65     
##  3rd Qu.:68.00   3rd Qu.:61.00   3rd Qu.:55.00     3rd Qu.:64.00     
##  Max.   :96.00   Max.   :94.00   Max.   :94.00     Max.   :93.00     
##                                                                      
##  skill_ball_control movement_acceleration movement_sprint_speed
##  Min.   : 5.00      Min.   :13.00         Min.   :12.00        
##  1st Qu.:54.00      1st Qu.:57.00         1st Qu.:57.00        
##  Median :63.00      Median :67.00         Median :67.00        
##  Mean   :58.48      Mean   :64.29         Mean   :64.33        
##  3rd Qu.:69.00      3rd Qu.:74.00         3rd Qu.:74.00        
##  Max.   :96.00      Max.   :97.00         Max.   :96.00        
##                                                                
##  movement_agility movement_reactions movement_balance power_shot_power
##  Min.   :14.00    Min.   :24.00      Min.   :12.00    Min.   :18.00   
##  1st Qu.:55.00    1st Qu.:56.00      1st Qu.:56.00    1st Qu.:48.00   
##  Median :66.00    Median :62.00      Median :66.00    Median :59.00   
##  Mean   :63.33    Mean   :61.61      Mean   :63.92    Mean   :57.75   
##  3rd Qu.:74.00    3rd Qu.:68.00      3rd Qu.:74.00    3rd Qu.:68.00   
##  Max.   :96.00    Max.   :95.00      Max.   :97.00    Max.   :95.00   
##                                                                       
##  power_jumping   power_stamina  power_strength  power_long_shots
##  Min.   :15.00   Min.   :12.0   Min.   :16.00   Min.   : 4.00   
##  1st Qu.:58.00   1st Qu.:55.0   1st Qu.:57.00   1st Qu.:32.00   
##  Median :65.00   Median :66.0   Median :66.00   Median :51.00   
##  Mean   :64.59   Mean   :62.6   Mean   :64.74   Mean   :46.76   
##  3rd Qu.:73.00   3rd Qu.:73.0   3rd Qu.:74.00   3rd Qu.:62.00   
##  Max.   :95.00   Max.   :97.0   Max.   :97.00   Max.   :94.00   
##                                                                 
##  mentality_aggression mentality_interceptions mentality_positioning
##  Min.   : 9.00        Min.   : 3.00           Min.   : 2.00        
##  1st Qu.:44.00        1st Qu.:25.00           1st Qu.:40.00        
##  Median :58.00        Median :52.00           Median :55.00        
##  Mean   :55.49        Mean   :46.25           Mean   :50.26        
##  3rd Qu.:68.00        3rd Qu.:64.00           3rd Qu.:64.00        
##  Max.   :96.00        Max.   :91.00           Max.   :95.00        
##                                                                    
##  mentality_vision mentality_penalties mentality_composure defending_marking
##  Min.   : 9.00    Min.   : 6.00       Min.   :12.00       Mode:logical     
##  1st Qu.:45.00    1st Qu.:38.75       1st Qu.:50.00       NA's:18944       
##  Median :55.00    Median :49.00       Median :59.00                        
##  Mean   :53.83    Mean   :48.05       Mean   :57.98                        
##  3rd Qu.:64.00    3rd Qu.:60.00       3rd Qu.:66.00                        
##  Max.   :95.00    Max.   :92.00       Max.   :96.00                        
##                                                                            
##  defending_standing_tackle defending_sliding_tackle goalkeeping_diving
##  Min.   : 5.00             Min.   : 4.00            Min.   : 1.00     
##  1st Qu.:27.00             1st Qu.:24.00            1st Qu.: 8.00     
##  Median :55.00             Median :52.00            Median :11.00     
##  Mean   :47.58             Mean   :45.55            Mean   :16.45     
##  3rd Qu.:65.00             3rd Qu.:63.00            3rd Qu.:14.00     
##  Max.   :93.00             Max.   :90.00            Max.   :90.00     
##                                                                       
##  goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
##  Min.   : 1.00        Min.   : 1.0        Min.   : 1.00          
##  1st Qu.: 8.00        1st Qu.: 8.0        1st Qu.: 8.00          
##  Median :11.00        Median :11.0        Median :11.00          
##  Mean   :16.24        Mean   :16.1        Mean   :16.23          
##  3rd Qu.:14.00        3rd Qu.:14.0        3rd Qu.:14.00          
##  Max.   :92.00        Max.   :93.0        Max.   :91.00          
##                                                                  
##  goalkeeping_reflexes      ls                 st                 rs           
##  Min.   : 1.00        Length:18944       Length:18944       Length:18944      
##  1st Qu.: 8.00        Class :character   Class :character   Class :character  
##  Median :11.00        Mode  :character   Mode  :character   Mode  :character  
##  Mean   :16.55                                                                
##  3rd Qu.:14.00                                                                
##  Max.   :90.00                                                                
##                                                                               
##       lw                 lf                 cf                 rf           
##  Length:18944       Length:18944       Length:18944       Length:18944      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       rw                lam                cam                ram           
##  Length:18944       Length:18944       Length:18944       Length:18944      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       lm                lcm                 cm                rcm           
##  Length:18944       Length:18944       Length:18944       Length:18944      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       rm                lwb                ldm                cdm           
##  Length:18944       Length:18944       Length:18944       Length:18944      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      rdm                rwb                 lb                lcb           
##  Length:18944       Length:18944       Length:18944       Length:18944      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       cb                rcb                 rb           
##  Length:18944       Length:18944       Length:18944      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
## 

Droping Columns

dataset <- subset(dataset, select = -c(player_url, dob, body_type, real_face, team_jersey_number, nation_jersey_number))
copy_dataset <- dataset

Exploratory Analysis

ggplot(dataset, aes(x = age)) + geom_histogram(binwidth = 1, stat = "bin", color = "black", fill = "#0066CC") + geom_vline(aes(xintercept = mean(age)), color = "#CC0000", linetype = "dashed" , size = 1) + labs(title = "Age Distribution", x = "Age Groups", y = "Frequnecy")

ggplot(dataset, aes(x = age)) + geom_boxplot()


From the above histogram graph it is clearly seen that the distribution of the age groups follows a normal distribution, explaining the even distribution of age groups, with few very young and few very old players. From the boxplot we can see that the older age groups have many outliers compared to the youngsters. Also majority of the footballers fall under the age groups 25 - 30.


ggplot(dataset, aes(x = overall)) + geom_histogram(binwidth = 1, stat = "bin", color = "black", fill = "#D7BDE2") + geom_vline(aes(xintercept = mean(overall)), color = "#CC0000", linetype = "dashed" , size = 1) + labs(title = "Overall Rating Distribution", x = "Rating", y = "Frequnecy")

ggplot(dataset, aes(x = overall)) + geom_boxplot()


From the above histogram, we can visualize the distribution of the overall rating and it follows a normal distribution, but the boxplot shows equal number of outliers at both the tails, indicating the presence of few extremely good players and low overall players. The average overall ranges from 65 - 75.


top_10_players <- dataset[c(1,2,4:7,9,11:12,14),c(2,28:33)]
rownames(top_10_players) <- c(top_10_players$short_name)
top_10_players <- subset(top_10_players, select = -c(short_name))
colors <- c("#00AFBB", "#E7B800", "#FC4E07","#6C3483","#A93226","#EAF2F8","#0B5345","#2471A3","#2ECC71","#C0392B")
titles <- c(rownames(top_10_players))
source("fun1.R")

max_min <- data.frame(
  pace = c(100, 30), shooting = c(100, 30), passing = c(100, 30),
  dribbling = c(100, 30), defending = c(100, 30), physic = c(100, 30)
)
rownames(max_min) <- c("Max", "Min")

colnames(max_min) <- colnames(top_10_players)
df <- rbind(max_min, top_10_players)
op <- par(mar = c(1, 1, 1,1))
par(mfrow = c(2,3))

for(i in 1:10){
  create_beautiful_radarchart(
  data = df[c(1, 2, i + 2), ], caxislabels = c(30, 47, 65, 82, 100),
  color = colors[i], title = titles[i]
    )
}

par(op)


The above graphs shows the distribution of individual attributes for the top 10 players excluding the goalkeepers. The graph shows how players strengths varies depending on their positions.


Which Clubs are economical?

This question is addressed to find out which clubs are more likely to spend on high quality players and be busy during a transfer market.

club_dataset <- dataset[dataset$club_name != "", ]
no_Players <- table(club_dataset$club_name)
no_Players <- data.frame(no_Players)
head(no_Players)
##                    Var1 Freq
## 1 1. FC Heidenheim 1846   30
## 2           1. FC Köln   30
## 3  1. FC Kaiserslautern   28
## 4       1. FC Magdeburg   27
## 5       1. FC Nürnberg   30
## 6    1. FC Saarbrücken   26
no_Players_summary <- summary(no_Players$Freq)
no_Players_summary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   26.00   28.00   27.49   30.00   33.00
paste("Each Team has an average of", no_Players_summary[4], sep = " ")
## [1] "Each Team has an average of 27.4875183553598"
ggplot(no_Players, aes(x = Freq)) + geom_boxplot()


Calculating the economic ratio for each player

Here we calculate the economic ratio for each player by finding the ratio of wage/ potential indicating, how much wage a player gets per potential.

economical_ratio <- club_dataset$wage_eur / club_dataset$potential
club_dataset$economic_ratio <- round(economical_ratio,2)
head(club_dataset[,c("short_name","wage_eur", "potential", "economic_ratio")])
##          short_name wage_eur potential economic_ratio
## 1          L. Messi   560000        93        6021.51
## 2 Cristiano Ronaldo   220000        92        2391.30
## 3          J. Oblak   125000        93        1344.09
## 4    R. Lewandowski   240000        91        2637.36
## 5         Neymar Jr   270000        91        2967.03
## 6      K. De Bruyne   370000        91        4065.93

Grouping the economic ration for each club

club_economical <- club_dataset[,c("club_name","economic_ratio")]
club_economical <- aggregate(club_economical$economic_ratio, by = list(Group = club_economical$club_name), FUN = mean)
names(club_economical)[1] <- "club_name"
names(club_economical)[2] <- "economic_ratio"
club_economical <- club_economical[order(-club_economical$economic_ratio), ]
club_economical$economic_ratio <- round(club_economical$economic_ratio,2)
head(club_economical)
##             club_name economic_ratio
## 492       Real Madrid        1753.59
## 223      FC Barcelona        1661.11
## 390   Manchester City        1323.29
## 378         Liverpool        1111.06
## 391 Manchester United        1071.96
## 335             Inter        1015.58

Top 10 Economical Clubs based on likely to spend

top_10_club_economical <- club_economical[1:10, ]
top_10_club_economical
##               club_name economic_ratio
## 492         Real Madrid        1753.59
## 223        FC Barcelona        1661.11
## 390     Manchester City        1323.29
## 378           Liverpool        1111.06
## 391   Manchester United        1071.96
## 335               Inter        1015.58
## 137             Chelsea         978.01
## 225  FC Bayern München         943.96
## 615   Tottenham Hotspur         917.44
## 453 Paris Saint-Germain         863.12
top_10_clubs <- top_10_club_economical$club_name
top_10_clubs
##  [1] "Real Madrid"         "FC Barcelona"        "Manchester City"    
##  [4] "Liverpool"           "Manchester United"   "Inter"              
##  [7] "Chelsea"             "FC Bayern München"  "Tottenham Hotspur"  
## [10] "Paris Saint-Germain"

p <- ggplot(top_10_club_economical, aes(x = club_name, y = economic_ratio)) + geom_bar(stat = "identity", width = 0.8, fill = "#0066CC") + theme(axis.text.x = element_text(angle = 90)) + labs(title = "Clubs with the Top Economic Ratio")
ggplotly(p)

From the above barplot, we can see the economic ratio, of the top 10 clubs (economic ratio wise). We can see that Real Madrid are the club most likely to spend on high quality players, followed by Barcelona and Chelsea. But the error in this data, is the inclusion of economic ratio of player bought long back.


Top 10 economical clubs in recent years (last 5 years)

top_10_clubs <- filter(dataset, club_name %in% top_10_clubs)
head(top_10_clubs)
##   sofifa_id     short_name                       long_name age height_cm
## 1    158023       L. Messi Lionel Andrés Messi Cuccittini  33       170
## 2    188545 R. Lewandowski              Robert Lewandowski  31       184
## 3    190871      Neymar Jr  Neymar da Silva Santos Júnior  28       175
## 4    192985   K. De Bruyne                 Kevin De Bruyne  29       181
## 5    231747     K. Mbappé           Kylian Mbappé Lottin  21       178
## 6    192448  M. ter Stegen          Marc-André ter Stegen  28       187
##   weight_kg nationality           club_name            league_name league_rank
## 1        72   Argentina        FC Barcelona Spain Primera Division           1
## 2        80      Poland  FC Bayern München   German 1. Bundesliga           1
## 3        68      Brazil Paris Saint-Germain         French Ligue 1           1
## 4        70     Belgium     Manchester City English Premier League           1
## 5        73      France Paris Saint-Germain         French Ligue 1           1
## 6        85     Germany        FC Barcelona Spain Primera Division           1
##   overall potential value_eur wage_eur player_positions preferred_foot
## 1      93        93  67500000   560000       RW, ST, CF           Left
## 2      91        91  80000000   240000               ST          Right
## 3      91        91  90000000   270000          LW, CAM          Right
## 4      91        91  87000000   370000          CAM, CM          Right
## 5      90        95 105500000   160000       ST, LW, RW          Right
## 6      90        93  69500000   260000               GK          Right
##   international_reputation weak_foot skill_moves     work_rate
## 1                        5         4           4    Medium/Low
## 2                        4         4           4   High/Medium
## 3                        5         5           5   High/Medium
## 4                        4         5           4     High/High
## 5                        3         4           5      High/Low
## 6                        3         4           1 Medium/Medium
##   release_clause_eur
## 1          138400000
## 2          132000000
## 3          166500000
## 4          161000000
## 5          203100000
## 6          147700000
##                                                                                                                player_tags
## 1                            #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward
## 2                                                                                    #Distance Shooter, #Clinical Finisher
## 3 #Speedster, #Dribbler, #Playmaker, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Midfielder, #Complete Forward
## 4                                        #Dribbler, #Playmaker, #Engine, #Distance Shooter, #Crosser, #Complete Midfielder
## 5                                                                                          #Speedster, #Dribbler, #Acrobat
## 6                                                                                                                         
##   team_position loaned_from     joined contract_valid_until nation_position
## 1           CAM             2004-07-01                 2021              RW
## 2            ST             2014-07-01                 2023                
## 3            LW             2017-08-03                 2022                
## 4           RCM             2015-08-30                 2023             RCM
## 5            LS             2018-07-01                 2022              RM
## 6            GK             2014-07-01                 2022             SUB
##   pace shooting passing dribbling defending physic gk_diving gk_handling
## 1   85       92      91        95        38     65        NA          NA
## 2   78       91      78        85        43     82        NA          NA
## 3   91       85      86        94        36     59        NA          NA
## 4   76       86      93        88        64     78        NA          NA
## 5   96       86      78        91        39     76        NA          NA
## 6   NA       NA      NA        NA        NA     NA        88          85
##   gk_kicking gk_reflexes gk_speed gk_positioning
## 1         NA          NA       NA             NA
## 2         NA          NA       NA             NA
## 3         NA          NA       NA             NA
## 4         NA          NA       NA             NA
## 5         NA          NA       NA             NA
## 6         88          90       45             88
##                                                                                                                              player_traits
## 1 Finesse Shot, Long Shot Taker (AI), Speed Dribbler (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Team Player, Chip Shot (AI)
## 2                                                                            Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI)
## 3                                                     Injury Prone, Flair, Speed Dribbler (AI), Outside Foot Shot, Technical Dribbler (AI)
## 4                       Injury Prone, Leadership, Early Crosser, Long Passer (AI), Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot
## 5                                                     Finesse Shot, Flair, Speed Dribbler (AI), Outside Foot Shot, Technical Dribbler (AI)
## 6                                                                                   Rushes Out Of Goal, Comes For Crosses, Saves with Feet
##   attacking_crossing attacking_finishing attacking_heading_accuracy
## 1                 85                  95                         70
## 2                 71                  94                         85
## 3                 85                  87                         62
## 4                 94                  82                         55
## 5                 78                  91                         73
## 6                 18                  14                         11
##   attacking_short_passing attacking_volleys skill_dribbling skill_curve
## 1                      91                88              96          93
## 2                      84                89              85          79
## 3                      87                87              95          88
## 4                      94                82              88          85
## 5                      83                83              92          79
## 6                      61                14              21          18
##   skill_fk_accuracy skill_long_passing skill_ball_control movement_acceleration
## 1                94                 91                 96                    91
## 2                85                 70                 88                    77
## 3                89                 81                 95                    94
## 4                83                 93                 92                    77
## 5                63                 70                 90                    96
## 6                12                 63                 30                    38
##   movement_sprint_speed movement_agility movement_reactions movement_balance
## 1                    80               91                 94               95
## 2                    78               77                 93               82
## 3                    89               96                 91               83
## 4                    76               78                 91               76
## 5                    96               92                 92               82
## 6                    50               37                 86               43
##   power_shot_power power_jumping power_stamina power_strength power_long_shots
## 1               86            68            72             69               94
## 2               89            84            76             86               85
## 3               80            62            81             50               84
## 4               91            63            89             74               91
## 5               86            77            86             76               79
## 6               66            79            35             78               10
##   mentality_aggression mentality_interceptions mentality_positioning
## 1                   44                      40                    93
## 2                   81                      49                    94
## 3                   51                      36                    87
## 4                   76                      66                    88
## 5                   62                      38                    91
## 6                   43                      22                    11
##   mentality_vision mentality_penalties mentality_composure defending_marking
## 1               95                  75                  96                NA
## 2               79                  88                  88                NA
## 3               90                  92                  93                NA
## 4               94                  84                  91                NA
## 5               80                  70                  84                NA
## 6               70                  25                  70                NA
##   defending_standing_tackle defending_sliding_tackle goalkeeping_diving
## 1                        35                       24                  6
## 2                        42                       19                 15
## 3                        30                       29                  9
## 4                        65                       53                 15
## 5                        34                       32                 13
## 6                        13                       10                 88
##   goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
## 1                   11                  15                      14
## 2                    6                  12                       8
## 3                    9                  15                      15
## 4                   13                   5                      10
## 5                    5                   7                      11
## 6                   85                  88                      88
##   goalkeeping_reflexes   ls   st   rs   lw   lf   cf   rf   rw  lam  cam  ram
## 1                    8 89+3 89+3 89+3 92+0 93+0 93+0 93+0 92+0 93+0 93+0 93+0
## 2                   10 89+2 89+2 89+2 85+0 87+0 87+0 87+0 85+0 85+3 85+3 85+3
## 3                   11 84+3 84+3 84+3 90+0 89+0 89+0 89+0 90+0 90+1 90+1 90+1
## 4                   13 83+3 83+3 83+3 88+0 88+0 88+0 88+0 88+0 89+2 89+2 89+2
## 5                    6 88+3 88+3 88+3 89+0 89+0 89+0 89+0 89+0 87+3 87+3 87+3
## 6                   90 35+3 35+3 35+3 34+0 38+0 38+0 38+0 34+0 42+3 42+3 42+3
##     lm  lcm   cm  rcm   rm  lwb  ldm  cdm  rdm  rwb   lb  lcb   cb  rcb   rb
## 1 91+2 87+3 87+3 87+3 91+2 66+3 65+3 65+3 65+3 66+3 62+3 52+3 52+3 52+3 62+3
## 2 83+3 79+3 79+3 79+3 83+3 64+3 65+3 65+3 65+3 64+3 61+3 60+3 60+3 60+3 61+3
## 3 90+1 83+3 83+3 83+3 90+1 67+3 62+3 62+3 62+3 67+3 62+3 49+3 49+3 49+3 62+3
## 4 89+2 89+2 89+2 89+2 89+2 79+3 80+3 80+3 80+3 79+3 75+3 69+3 69+3 69+3 75+3
## 5 87+3 79+3 79+3 79+3 87+3 67+3 63+3 63+3 63+3 67+3 63+3 55+3 55+3 55+3 63+3
## 6 39+3 45+3 45+3 45+3 39+3 33+3 41+3 41+3 41+3 33+3 31+3 33+3 33+3 33+3 31+3

top_10_clubs <- subset(top_10_clubs, select = -c(height_cm, weight_kg, nationality, player_positions, preferred_foot, international_reputation, weak_foot, skill_moves, work_rate))
top_10_clubs <- subset(top_10_clubs, select = -c(player_tags, team_position, loaned_from))
top_10_clubs <- top_10_clubs[-c(14:87)]
#View(top_10_clubs)
copy_top_10_clubs <- top_10_clubs
top_10_clubs$joined <- as.Date(top_10_clubs$joined, "%Y-%m-%d")
top_10_clubs$joined <- as.character(format(top_10_clubs$joined,"%Y"))
head(top_10_clubs)
##   sofifa_id     short_name                       long_name age
## 1    158023       L. Messi Lionel Andrés Messi Cuccittini  33
## 2    188545 R. Lewandowski              Robert Lewandowski  31
## 3    190871      Neymar Jr  Neymar da Silva Santos Júnior  28
## 4    192985   K. De Bruyne                 Kevin De Bruyne  29
## 5    231747     K. Mbappé           Kylian Mbappé Lottin  21
## 6    192448  M. ter Stegen          Marc-André ter Stegen  28
##             club_name            league_name league_rank overall potential
## 1        FC Barcelona Spain Primera Division           1      93        93
## 2  FC Bayern München   German 1. Bundesliga           1      91        91
## 3 Paris Saint-Germain         French Ligue 1           1      91        91
## 4     Manchester City English Premier League           1      91        91
## 5 Paris Saint-Germain         French Ligue 1           1      90        95
## 6        FC Barcelona Spain Primera Division           1      90        93
##   value_eur wage_eur release_clause_eur joined   rb
## 1  67500000   560000          138400000   2004 62+3
## 2  80000000   240000          132000000   2014 61+3
## 3  90000000   270000          166500000   2017 62+3
## 4  87000000   370000          161000000   2015 75+3
## 5 105500000   160000          203100000   2018 63+3
## 6  69500000   260000          147700000   2014 31+3
tail(top_10_clubs)
##     sofifa_id        short_name                long_name age         club_name
## 309    256831         D. Cirkin            Dennis Cirkin  18 Tottenham Hotspur
## 310    252793 T. Harwood-Bellis    Taylor Harwood-Bellis  18   Manchester City
## 311    252794  Adrián Bernabé Adrián Bernabé García  19   Manchester City
## 312    240913       C. Kelleher        Caoimhin Kelleher  21         Liverpool
## 313    254120          T. Doyle             Thomas Doyle  18   Manchester City
## 314    245903          H. White             Harvey White  18 Tottenham Hotspur
##                league_name league_rank overall potential value_eur wage_eur
## 309 English Premier League           1      61        83    525000     3000
## 310 English Premier League           1      61        82    525000     4000
## 311 English Premier League           1      61        77    550000     9000
## 312 English Premier League           1      61        75    475000     6000
## 313 English Premier League           1      60        82    475000     5000
## 314 English Premier League           1      60        80    400000     3000
##     release_clause_eur joined   rb
## 309            1500000   2019 59+2
## 310            1500000   2019 59+2
## 311            1400000   2019 49+2
## 312            1200000   2017 21+2
## 313            1400000   2019 55+2
## 314            1200000   2018 60+2

transfer_expenditure <- filter(top_10_clubs, joined %in% c("2015","2016","2017","2018","2019","2020"))
#View(transfer_expenditure)
teconomical_ratio <- transfer_expenditure$wage_eur / transfer_expenditure$potential
transfer_expenditure$economic_ratio <- round(teconomical_ratio,2)
#View(transfer_expenditure)
Transfer_club_economical <- transfer_expenditure[,c("club_name","economic_ratio")]
Transfer_club_economical <- aggregate(Transfer_club_economical$economic_ratio, by = list(Group = Transfer_club_economical$club_name), FUN = mean)
names(Transfer_club_economical)[1] <- "club_name"
names(Transfer_club_economical)[2] <- "economic_ratio"
Transfer_club_economical <- Transfer_club_economical[order(-Transfer_club_economical$economic_ratio), ]
Transfer_club_economical$economic_ratio <- round(Transfer_club_economical$economic_ratio,2)
Transfer_club_economical
##              club_name economic_ratio
## 6      Manchester City        1309.17
## 9          Real Madrid        1283.66
## 2         FC Barcelona        1241.91
## 5            Liverpool        1109.28
## 4                Inter        1039.29
## 7    Manchester United         999.12
## 1              Chelsea         974.57
## 10   Tottenham Hotspur         819.82
## 8  Paris Saint-Germain         813.44
## 3   FC Bayern München         751.59

p <- ggplot(Transfer_club_economical, aes(x = club_name, y = economic_ratio)) + geom_bar(stat = "identity", width = 0.8, fill = "#0066CC") + theme(axis.text.x = element_text(angle = 90)) + labs(title = "Clubs with the Top Economic Ratio in last 5 years")
ggplotly(p)

From the newly ploted economical club barplot, we can see Manchester City are the most economical club in recent years and not Real Madrid


Spending view of the top 3 active clubs in recent years

man_city <- filter(transfer_expenditure, club_name %in% c("Manchester City","Real Madrid","FC Barcelona"))
ggplot(man_city, aes(fill = club_name, y=economic_ratio, x=joined)) + 
    geom_bar(position="dodge",stat="identity")


This plot gives us better view and shows us that the spending of Manchester City has been declining in recent years, whereas Real Madrid have had an up and down spending. Real Madrid have had a low spending in 2017, followed by a great spending in 2018, 2019. Fc Barcelona have had a consistent spending in all years, and are more likely to be active in the market.


dataset2 <- read.csv("players_17.csv")
dataset2 <- subset(dataset2, select = c(long_name,potential,overall))
dataset1 <- subset(dataset, select = c(long_name,potential,overall))
newdataset <- merge(dataset1,dataset2, by.x = "long_name", by.y = "long_name")
newdataset$overalldiff <- newdataset$overall.x - newdataset$overall.y
newdataset$potentialdiff <- newdataset$potential.x - newdataset$potential.y
newdataset$overall_goal <- newdataset$overall.x - newdataset$potential.y
head(newdataset)
##                   long_name potential.x overall.x potential.y overall.y
## 1     Ögmundur Kristinsson          64        64          65        63
## 2        Ömer Ali Şahiner          74        74          78        74
## 3              Ömer Bayram          72        72          70        68
## 4 Ömer Hasan Şişmanoğlu          69        69          75        74
## 5              Ömer Toprak          76        76          85        84
## 6             喜田 拓也          73        69          74        66
##   overalldiff potentialdiff overall_goal
## 1           1            -1           -1
## 2           0            -4           -4
## 3           4             2            2
## 4          -5            -6           -6
## 5          -8            -9           -9
## 6           3            -1           -5

newdataset <- subset(newdataset, select = -c(potential.x,overall.x,potential.y,overall.y))
names(newdataset)[2] <- "improv_overall"
names(newdataset)[3] <- "improv_potential"
names(newdataset)[4] <- "target_overall"
head(newdataset)
##                   long_name improv_overall improv_potential target_overall
## 1     Ögmundur Kristinsson              1               -1             -1
## 2        Ömer Ali Şahiner              0               -4             -4
## 3              Ömer Bayram              4                2              2
## 4 Ömer Hasan Şişmanoğlu             -5               -6             -6
## 5              Ömer Toprak             -8               -9             -9
## 6             喜田 拓也              3               -1             -5
dataset <- merge(dataset,newdataset, by.x = "long_name", by.y = "long_name")
dataset <- subset(dataset, select = -c(long_name))

In the above table the inclusion of the three new columns imporv_overall, imporv_potential and target_overall are the improvement in player overall, potential in the last 5 years and the last target is the difference between the overall now and potential 5 years ago,


dataset <- dataset[order(-dataset$overall),]
dataset <- subset(dataset, select = -c(height_cm, weight_kg, preferred_foot, international_reputation, weak_foot, skill_moves,loaned_from,joined,player_traits))
fifa21 <- dataset[,-c(65:90)]
head(fifa21)
##      sofifa_id        short_name age nationality           club_name
## 4998    158023          L. Messi  33   Argentina        FC Barcelona
## 1727     20801 Cristiano Ronaldo  35    Portugal            Juventus
## 3806    200389          J. Oblak  27    Slovenia    Atlético Madrid
## 4690    192985      K. De Bruyne  29     Belgium     Manchester City
## 6197    190871         Neymar Jr  28      Brazil Paris Saint-Germain
## 6986    188545    R. Lewandowski  31      Poland  FC Bayern München
##                 league_name league_rank overall potential value_eur wage_eur
## 4998 Spain Primera Division           1      93        93  67500000   560000
## 1727        Italian Serie A           1      92        92  46000000   220000
## 3806 Spain Primera Division           1      91        93  75000000   125000
## 4690 English Premier League           1      91        91  87000000   370000
## 6197         French Ligue 1           1      91        91  90000000   270000
## 6986   German 1. Bundesliga           1      91        91  80000000   240000
##      player_positions     work_rate release_clause_eur
## 4998       RW, ST, CF    Medium/Low          138400000
## 1727           ST, LW      High/Low           75900000
## 3806               GK Medium/Medium          159400000
## 4690          CAM, CM     High/High          161000000
## 6197          LW, CAM   High/Medium          166500000
## 6986               ST   High/Medium          132000000
##                                                                                                                   player_tags
## 4998                            #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward
## 1727                            #Aerial Threat, #Dribbler, #Distance Shooter, #Acrobat, #Clinical Finisher, #Complete Forward
## 3806                                                                                                                         
## 4690                                        #Dribbler, #Playmaker, #Engine, #Distance Shooter, #Crosser, #Complete Midfielder
## 6197 #Speedster, #Dribbler, #Playmaker, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Midfielder, #Complete Forward
## 6986                                                                                    #Distance Shooter, #Clinical Finisher
##      team_position contract_valid_until nation_position pace shooting passing
## 4998           CAM                 2021              RW   85       92      91
## 1727            LS                 2022              LS   89       93      81
## 3806            GK                 2023              GK   NA       NA      NA
## 4690           RCM                 2023             RCM   76       86      93
## 6197            LW                 2022                   91       85      86
## 6986            ST                 2023                   78       91      78
##      dribbling defending physic gk_diving gk_handling gk_kicking gk_reflexes
## 4998        95        38     65        NA          NA         NA          NA
## 1727        89        35     77        NA          NA         NA          NA
## 3806        NA        NA     NA        87          92         78          90
## 4690        88        64     78        NA          NA         NA          NA
## 6197        94        36     59        NA          NA         NA          NA
## 6986        85        43     82        NA          NA         NA          NA
##      gk_speed gk_positioning attacking_crossing attacking_finishing
## 4998       NA             NA                 85                  95
## 1727       NA             NA                 84                  95
## 3806       52             90                 13                  11
## 4690       NA             NA                 94                  82
## 6197       NA             NA                 85                  87
## 6986       NA             NA                 71                  94
##      attacking_heading_accuracy attacking_short_passing attacking_volleys
## 4998                         70                      91                88
## 1727                         90                      82                86
## 3806                         15                      43                13
## 4690                         55                      94                82
## 6197                         62                      87                87
## 6986                         85                      84                89
##      skill_dribbling skill_curve skill_fk_accuracy skill_long_passing
## 4998              96          93                94                 91
## 1727              88          81                76                 77
## 3806              12          13                14                 40
## 4690              88          85                83                 93
## 6197              95          88                89                 81
## 6986              85          79                85                 70
##      skill_ball_control movement_acceleration movement_sprint_speed
## 4998                 96                    91                    80
## 1727                 92                    87                    91
## 3806                 30                    43                    60
## 4690                 92                    77                    76
## 6197                 95                    94                    89
## 6986                 88                    77                    78
##      movement_agility movement_reactions movement_balance power_shot_power
## 4998               91                 94               95               86
## 1727               87                 95               71               94
## 3806               67                 88               49               59
## 4690               78                 91               76               91
## 6197               96                 91               83               80
## 6986               77                 93               82               89
##      power_jumping power_stamina power_strength power_long_shots
## 4998            68            72             69               94
## 1727            95            84             78               93
## 3806            78            41             78               12
## 4690            63            89             74               91
## 6197            62            81             50               84
## 6986            84            76             86               85
##      mentality_aggression mentality_interceptions mentality_positioning
## 4998                   44                      40                    93
## 1727                   63                      29                    95
## 3806                   34                      19                    11
## 4690                   76                      66                    88
## 6197                   51                      36                    87
## 6986                   81                      49                    94
##      mentality_vision mentality_penalties mentality_composure defending_marking
## 4998               95                  75                  96                NA
## 1727               82                  84                  95                NA
## 3806               65                  11                  68                NA
## 4690               94                  84                  91                NA
## 6197               90                  92                  93                NA
## 6986               79                  88                  88                NA
##      defending_standing_tackle defending_sliding_tackle goalkeeping_diving
## 4998                        35                       24                  6
## 1727                        32                       24                  7
## 3806                        12                       18                 87
## 4690                        65                       53                 15
## 6197                        30                       29                  9
## 6986                        42                       19                 15
##      goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
## 4998                   11                  15                      14
## 1727                   11                  15                      14
## 3806                   92                  78                      90
## 4690                   13                   5                      10
## 6197                    9                  15                      15
## 6986                    6                  12                       8
##      goalkeeping_reflexes improv_overall improv_potential target_overall
## 4998                    8              0                0              0
## 1727                   11             -2               -2             -2
## 3806                   90              4                2              0
## 4690                   13              3                0              0
## 6197                   11             -1               -4             -4
## 6986                   10              1                1              1

To predict the market value of the players.

This question is addressed to predict players based on their strengths, so clubs can predict how much they need to spend and spend on the right player

positions <- factor(fifa21$team_position)
nlevels(positions)
## [1] 30
levels(positions)
##  [1] ""    "CAM" "CB"  "CDM" "CF"  "CM"  "GK"  "LAM" "LB"  "LCB" "LCM" "LDM"
## [13] "LF"  "LM"  "LS"  "LW"  "LWB" "RAM" "RB"  "RCB" "RCM" "RDM" "RES" "RF" 
## [25] "RM"  "RS"  "RW"  "RWB" "ST"  "SUB"
source("fun.R")
fifa21$team_position <- position_filter(fifa21$team_position, fifa21$player_positions)
positions <- factor(fifa21$team_position)
nlevels(positions)
## [1] 27
levels(positions)
##  [1] "CAM" "CB"  "CDM" "CF"  "CM"  "GK"  "LAM" "LB"  "LCB" "LCM" "LDM" "LF" 
## [13] "LM"  "LS"  "LW"  "LWB" "RAM" "RB"  "RCB" "RCM" "RDM" "RF"  "RM"  "RS" 
## [25] "RW"  "RWB" "ST"
fifa21$team_position[fifa21$team_position == "CAM" | fifa21$team_position == "LAM" | fifa21$team_position == "RAM" ] <- "AMF"
fifa21$team_position[fifa21$team_position == "CDM" | fifa21$team_position == "LDM" | fifa21$team_position == "RDM" ] <- "DMF"
fifa21$team_position[fifa21$team_position == "CM" | fifa21$team_position == "LCM" | fifa21$team_position == "RCM" ] <- "CMF"
fifa21$team_position[fifa21$team_position == "CB" | fifa21$team_position == "LCB" | fifa21$team_position == "RCB" ] <- "FB"
fifa21$team_position[fifa21$team_position == "CF" | fifa21$team_position == "LF" | fifa21$team_position == "RF" ] <- "FW"
fifa21$team_position[fifa21$team_position == "LS" | fifa21$team_position == "RS"] <- "SS"
fifa21$team_position[fifa21$team_position == "LWB"] <- "LB"
fifa21$team_position[fifa21$team_position == "RWB"] <- "RB"
positions <- factor(fifa21$team_position)
nlevels(positions)
## [1] 14
levels(positions)
##  [1] "AMF" "CMF" "DMF" "FB"  "FW"  "GK"  "LB"  "LM"  "LW"  "RB"  "RM"  "RW" 
## [13] "SS"  "ST"
samplefifa21 <- fifa21[fifa21$improv_overall > 0, ]
fifa21 <- subset(fifa21, select = -c(league_rank, player_positions, player_tags, nation_position))
nums <- unlist(lapply(fifa21, is.numeric)) 
corr_set <- fifa21
corr_set <- corr_set[,nums]
corr_set <- subset(corr_set,select = -c(sofifa_id,contract_valid_until))
summary(corr_set)
##       age           overall        potential       value_eur        
##  Min.   :17.00   Min.   :49.00   Min.   :52.00   Min.   :        0  
##  1st Qu.:25.00   1st Qu.:66.00   1st Qu.:67.00   1st Qu.:   525000  
##  Median :28.00   Median :69.00   Median :71.00   Median :  1100000  
##  Mean   :27.82   Mean   :69.76   Mean   :71.81   Mean   :  3719386  
##  3rd Qu.:30.00   3rd Qu.:73.00   3rd Qu.:75.00   3rd Qu.:  4000000  
##  Max.   :43.00   Max.   :93.00   Max.   :95.00   Max.   :105500000  
##                                                                     
##     wage_eur      release_clause_eur       pace          shooting    
##  Min.   :     0   Min.   :    13000   Min.   :28.00   Min.   :16.00  
##  1st Qu.:  2000   1st Qu.:   878000   1st Qu.:60.00   1st Qu.:46.00  
##  Median :  6000   Median :  2000000   Median :69.00   Median :59.00  
##  Mean   : 15193   Mean   :  7080571   Mean   :67.38   Mean   :56.26  
##  3rd Qu.: 17000   3rd Qu.:  7600000   3rd Qu.:76.00   3rd Qu.:67.00  
##  Max.   :560000   Max.   :203100000   Max.   :96.00   Max.   :93.00  
##                   NA's   :379         NA's   :940     NA's   :940    
##     passing        dribbling       defending         physic     
##  Min.   :25.00   Min.   :28.00   Min.   :17.00   Min.   :35.00  
##  1st Qu.:56.00   1st Qu.:61.00   1st Qu.:41.00   1st Qu.:64.00  
##  Median :62.00   Median :67.00   Median :61.00   Median :70.00  
##  Mean   :61.72   Mean   :65.79   Mean   :56.01   Mean   :68.61  
##  3rd Qu.:68.00   3rd Qu.:72.00   3rd Qu.:68.00   3rd Qu.:74.00  
##  Max.   :93.00   Max.   :95.00   Max.   :91.00   Max.   :91.00  
##  NA's   :940     NA's   :940     NA's   :940     NA's   :940    
##    gk_diving      gk_handling      gk_kicking     gk_reflexes      gk_speed    
##  Min.   :48.00   Min.   :43.00   Min.   :40.00   Min.   :48.0   Min.   :12.00  
##  1st Qu.:65.00   1st Qu.:63.00   1st Qu.:61.00   1st Qu.:66.0   1st Qu.:36.00  
##  Median :69.00   Median :67.00   Median :65.00   Median :70.0   Median :43.00  
##  Mean   :69.71   Mean   :67.02   Mean   :65.73   Mean   :70.7   Mean   :42.36  
##  3rd Qu.:74.00   3rd Qu.:71.00   3rd Qu.:70.00   3rd Qu.:75.0   3rd Qu.:48.00  
##  Max.   :90.00   Max.   :92.00   Max.   :93.00   Max.   :90.0   Max.   :65.00  
##  NA's   :7397    NA's   :7397    NA's   :7397    NA's   :7397   NA's   :7397   
##  gk_positioning  attacking_crossing attacking_finishing
##  Min.   :46.00   Min.   : 6.00      Min.   : 5.00      
##  1st Qu.:64.00   1st Qu.:44.00      1st Qu.:33.00      
##  Median :68.00   Median :59.00      Median :53.00      
##  Mean   :68.33   Mean   :53.85      Mean   :48.74      
##  3rd Qu.:73.00   3rd Qu.:68.00      3rd Qu.:65.00      
##  Max.   :91.00   Max.   :94.00      Max.   :95.00      
##  NA's   :7397                                          
##  attacking_heading_accuracy attacking_short_passing attacking_volleys
##  Min.   : 5.00              Min.   :11.00           Min.   : 5.00    
##  1st Qu.:48.00              1st Qu.:59.00           1st Qu.:32.00    
##  Median :59.00              Median :66.00           Median :49.00    
##  Mean   :55.37              Mean   :62.62           Mean   :46.52    
##  3rd Qu.:68.00              3rd Qu.:72.00           3rd Qu.:61.00    
##  Max.   :93.00              Max.   :94.00           Max.   :90.00    
##                                                                      
##  skill_dribbling  skill_curve    skill_fk_accuracy skill_long_passing
##  Min.   : 5.00   Min.   : 6.00   Min.   : 6.00     Min.   : 9.00     
##  1st Qu.:53.00   1st Qu.:38.00   1st Qu.:33.00     1st Qu.:51.00     
##  Median :64.00   Median :56.00   Median :48.00     Median :61.00     
##  Mean   :58.62   Mean   :51.75   Mean   :46.76     Mean   :57.35     
##  3rd Qu.:71.00   3rd Qu.:67.00   3rd Qu.:62.00     3rd Qu.:67.00     
##  Max.   :96.00   Max.   :94.00   Max.   :94.00     Max.   :93.00     
##                                                                      
##  skill_ball_control movement_acceleration movement_sprint_speed
##  Min.   : 5.00      Min.   :13.00         Min.   :12.00        
##  1st Qu.:59.00      1st Qu.:55.00         1st Qu.:55.00        
##  Median :66.00      Median :67.00         Median :67.00        
##  Mean   :62.14      Mean   :64.48         Mean   :64.61        
##  3rd Qu.:72.00      3rd Qu.:75.00         3rd Qu.:75.00        
##  Max.   :96.00      Max.   :97.00         Max.   :96.00        
##                                                                
##  movement_agility movement_reactions movement_balance power_shot_power
##  Min.   :14.00    Min.   :31.00      Min.   :18.0     Min.   :18.0    
##  1st Qu.:58.00    1st Qu.:61.00      1st Qu.:56.0     1st Qu.:53.0    
##  Median :68.00    Median :66.00      Median :66.0     Median :64.0    
##  Mean   :65.43    Mean   :66.44      Mean   :64.4     Mean   :62.4    
##  3rd Qu.:75.00    3rd Qu.:71.00      3rd Qu.:74.0     3rd Qu.:72.0    
##  Max.   :96.00    Max.   :95.00      Max.   :97.0     Max.   :95.0    
##                                                                       
##  power_jumping   power_stamina   power_strength  power_long_shots
##  Min.   :15.00   Min.   :14.00   Min.   :24.00   Min.   : 4.00   
##  1st Qu.:61.00   1st Qu.:61.00   1st Qu.:62.00   1st Qu.:38.00   
##  Median :69.00   Median :70.00   Median :70.00   Median :57.00   
##  Mean   :67.54   Mean   :66.14   Mean   :68.58   Mean   :51.42   
##  3rd Qu.:76.00   3rd Qu.:76.00   3rd Qu.:76.00   3rd Qu.:66.00   
##  Max.   :95.00   Max.   :97.00   Max.   :97.00   Max.   :94.00   
##                                                                  
##  mentality_aggression mentality_interceptions mentality_positioning
##  Min.   :10.00        Min.   : 6.00           Min.   : 3.00        
##  1st Qu.:50.00        1st Qu.:32.00           1st Qu.:43.00        
##  Median :65.00        Median :59.00           Median :60.00        
##  Mean   :60.55        Mean   :51.34           Mean   :53.67        
##  3rd Qu.:73.00        3rd Qu.:68.00           3rd Qu.:68.00        
##  Max.   :95.00        Max.   :91.00           Max.   :95.00        
##                                                                    
##  mentality_vision mentality_penalties mentality_composure
##  Min.   :10.00    Min.   : 7.00       Min.   :12.00      
##  1st Qu.:49.00    1st Qu.:40.00       1st Qu.:58.00      
##  Median :60.00    Median :53.00       Median :65.00      
##  Mean   :57.93    Mean   :51.01       Mean   :63.68      
##  3rd Qu.:68.00    3rd Qu.:64.00       3rd Qu.:70.00      
##  Max.   :95.00    Max.   :92.00       Max.   :96.00      
##                                                          
##  defending_standing_tackle defending_sliding_tackle goalkeeping_diving
##  Min.   : 7.00             Min.   : 6.00            Min.   : 1.00     
##  1st Qu.:31.00             1st Qu.:27.00            1st Qu.: 8.00     
##  Median :61.00             Median :58.00            Median :11.00     
##  Mean   :51.39             Mean   :48.92            Mean   :17.39     
##  3rd Qu.:69.00             3rd Qu.:67.00            3rd Qu.:14.00     
##  Max.   :93.00             Max.   :90.00            Max.   :90.00     
##                                                                       
##  goalkeeping_handling goalkeeping_kicking goalkeeping_positioning
##  Min.   : 1.00        Min.   : 1.00       Min.   : 1.00          
##  1st Qu.: 9.00        1st Qu.: 9.00       1st Qu.: 8.00          
##  Median :11.00        Median :11.00       Median :11.00          
##  Mean   :17.16        Mean   :17.03       Mean   :17.27          
##  3rd Qu.:14.00        3rd Qu.:14.00       3rd Qu.:14.00          
##  Max.   :92.00        Max.   :93.00       Max.   :91.00          
##                                                                  
##  goalkeeping_reflexes improv_overall    improv_potential  target_overall   
##  Min.   : 1.0         Min.   :-24.000   Min.   :-19.000   Min.   :-26.000  
##  1st Qu.: 8.0         1st Qu.: -2.000   1st Qu.: -5.000   1st Qu.: -7.000  
##  Median :11.0         Median :  1.000   Median : -2.000   Median : -3.000  
##  Mean   :17.5         Mean   :  1.846   Mean   : -1.385   Mean   : -3.434  
##  3rd Qu.:14.0         3rd Qu.:  5.000   3rd Qu.:  1.000   3rd Qu.:  0.000  
##  Max.   :90.0         Max.   : 29.000   Max.   : 23.000   Max.   : 23.000  
## 

#gk set
corr_set1 <- corr_set[complete.cases(corr_set$gk_diving),]
corr_set1 <- corr_set1[ , colSums(is.na(corr_set1)) == 0]

#rest
corr_set2 <- corr_set[complete.cases(corr_set$pace),]
corr_set2 <- corr_set2[ , colSums(is.na(corr_set2)) == 0]

cormat1 <- round(cor(corr_set1),2)
cormat2 <- round(cor(corr_set2),2)
mcorr_set1 <- melt(cormat1)
theme_set(theme_gray(base_size = 5))
g <- ggplot(data = mcorr_set1, aes(x=Var1, y=Var2, fill=value)) + 
  geom_raster() + theme(axis.text.x = element_text(angle = 90)) +  scale_fill_gradient2(low="blue", high="red", guide="colorbar")
ggplotly(g)

From the above matrix, we can see the critical attributes for goalkeepers, and these factors can be used to predict the market values of players.


mcorr_set2 <- melt(cormat2)
theme_set(theme_gray(base_size = 5))
g <- ggplot(data = mcorr_set2, aes(x=Var1, y=Var2, fill=value)) + 
  geom_raster() + theme(axis.text.x = element_text(angle = 90)) + scale_fill_gradient2(low="blue", high="red", guide="colorbar")
ggplotly(g)

From this above matrix, we can see the attributes affecting the non-goalkeepers. We can see the positively correlated factors for the strikers, defenders, midfielders.


goalkeeper <- samplefifa21[samplefifa21$team_position == "GK",]
dim(goalkeeper)
## [1] 570  67
defenders <- samplefifa21[samplefifa21$team_position == "FB" | samplefifa21$team_position == "LB" |
                          samplefifa21$team_position == "RB", ]
dim(defenders)
## [1] 1585   67
midfielders <- samplefifa21[samplefifa21$team_position == "CMF" | samplefifa21$team_position == "DMF" | 
                            samplefifa21$team_position == "AMF" | samplefifa21$team_position == "RM" | 
                              samplefifa21$team_position == "LM", ]
dim(midfielders)
## [1] 1618   67
forwards <- samplefifa21[samplefifa21$team_position == "FW" | samplefifa21$team_position == "LW" | 
                         samplefifa21$team_position == "RW" | samplefifa21$team_position == "SS" | 
                           samplefifa21$team_position == "ST", ]
dim(forwards)
## [1] 770  67
train.index = sample(seq(1,nrow(goalkeeper)), floor(0.8*nrow(goalkeeper)))
selected.var <- c(2,3,8:11,25:30,44,46)

train.t <- goalkeeper[train.index, selected.var]
valid.t <- goalkeeper[-train.index, selected.var]
head(valid.t)
##         short_name age overall potential value_eur wage_eur gk_diving
## 2364       Ederson  26      88        91  53500000   195000        86
## 4717      K. Navas  33      87        87  27000000   110000        90
## 8245     Y. Sommer  31      86        86  28500000    55000        80
## 7086     R. Bürki  29      84        85  25000000    67000        85
## 5200 L. Hrádecký  30      83        83  17500000    63000        85
## 6209       N. Pope  28      82        83  18500000    59000        79
##      gk_handling gk_kicking gk_reflexes gk_speed gk_positioning
## 2364          82         93          88       63             86
## 4717          81         75          90       53             82
## 8245          86         85          85       51             87
## 7086          82         72          88       47             82
## 5200          78         69          87       41             83
## 6209          81         75          84       48             83
##      movement_reactions power_shot_power
## 2364                 87               70
## 4717                 84               56
## 8245                 84               64
## 7086                 82               54
## 5200                 82               52
## 6209                 75               56
gklm = lm(value_eur ~ overall * age + gk_reflexes * movement_reactions + wage_eur + gk_diving * gk_positioning +  potential + gk_speed+ gk_handling  * gk_kicking 
          * power_shot_power,data = train.t)
summary(gklm)
## 
## Call:
## lm(formula = value_eur ~ overall * age + gk_reflexes * movement_reactions + 
##     wage_eur + gk_diving * gk_positioning + potential + gk_speed + 
##     gk_handling * gk_kicking * power_shot_power, data = train.t)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -9531534  -775268    29726   774259 25162412 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             -4.736e+08  6.170e+07  -7.676 1.08e-13
## overall                                  3.141e+06  3.401e+05   9.236  < 2e-16
## age                                      4.557e+06  3.419e+05  13.328  < 2e-16
## gk_reflexes                             -1.313e+06  1.806e+05  -7.272 1.66e-12
## movement_reactions                      -1.352e+06  1.818e+05  -7.438 5.49e-13
## wage_eur                                 6.546e+01  8.989e+00   7.282 1.55e-12
## gk_diving                               -2.462e+06  2.534e+05  -9.717  < 2e-16
## gk_positioning                          -2.436e+06  2.599e+05  -9.373  < 2e-16
## potential                                1.557e+05  5.862e+04   2.655  0.00821
## gk_speed                                -6.468e+03  1.191e+04  -0.543  0.58738
## gk_handling                              7.299e+06  1.036e+06   7.047 7.18e-12
## gk_kicking                               1.595e+07  3.453e+06   4.620 5.05e-06
## power_shot_power                         2.946e+06  4.237e+06   0.695  0.48730
## overall:age                             -6.675e+04  4.729e+03 -14.115  < 2e-16
## gk_reflexes:movement_reactions           1.778e+04  2.534e+03   7.018 8.66e-12
## gk_diving:gk_positioning                 3.190e+04  3.605e+03   8.850  < 2e-16
## gk_handling:gk_kicking                  -2.249e+05  5.119e+04  -4.394 1.40e-05
## gk_handling:power_shot_power            -3.206e+04  6.229e+04  -0.515  0.60702
## gk_kicking:power_shot_power             -1.975e+05  1.899e+04 -10.403  < 2e-16
## gk_handling:gk_kicking:power_shot_power  2.702e+03  2.781e+02   9.714  < 2e-16
##                                            
## (Intercept)                             ***
## overall                                 ***
## age                                     ***
## gk_reflexes                             ***
## movement_reactions                      ***
## wage_eur                                ***
## gk_diving                               ***
## gk_positioning                          ***
## potential                               ** 
## gk_speed                                   
## gk_handling                             ***
## gk_kicking                              ***
## power_shot_power                           
## overall:age                             ***
## gk_reflexes:movement_reactions          ***
## gk_diving:gk_positioning                ***
## gk_handling:gk_kicking                  ***
## gk_handling:power_shot_power               
## gk_kicking:power_shot_power             ***
## gk_handling:gk_kicking:power_shot_power ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2120000 on 436 degrees of freedom
## Multiple R-squared:  0.9225, Adjusted R-squared:  0.9192 
## F-statistic: 273.3 on 19 and 436 DF,  p-value: < 2.2e-16
gklm.pred <- predict(gklm, valid.t)
accuracy(gklm.pred, valid.t$value_eur)
##                ME    RMSE     MAE       MPE     MAPE
## Test set 80304.58 1873938 1235118 -70.33823 361.9268
goalkeeper[goalkeeper$overall == 85 & goalkeeper$age == 30, c("short_name","value_eur")]
##       short_name value_eur
## 6464 P. Gulácsi  26000000
predict(gklm,data.frame(overall = 85 ,age = 30, wage_eur = 65000, gk_diving = 84, potential = 85, gk_handling = 85, gk_kicking =82, gk_reflexes =86, gk_speed
          =43, gk_positioning =84, movement_reactions =82, power_shot_power = 62))
##        1 
## 29996853

In the above, model, P.Gulaici’s market value is predicted


train.index = sample(seq(1,nrow(defenders)), floor(0.8*nrow(defenders)))
selected.var <- c(2,3,8:11,23,24,51,52,58,59)

train.t <- defenders[train.index, selected.var]
valid.t <- defenders[-train.index, selected.var]

head(valid.t)
##             short_name age overall potential value_eur wage_eur defending
## 6920   Ricardo Pereira  26      85        87  40500000   120000        81
## 2161         D. Djené  28      82        83  21500000    39000        84
## 4459 Josué Chiamulera  28      82        82  19500000    41000        85
## 5092     L. Hernández  24      82        87  26500000    70000        83
## 369       Adryan Zonta  28      81        81  15000000      500        72
## 4486    Juiano Mestres  24      81        81         0        0        82
##      physic mentality_aggression mentality_interceptions
## 6920     76                   79                      81
## 2161     80                   88                      85
## 4459     79                   88                      84
## 5092     79                   86                      83
## 369      77                   69                      74
## 4486     80                   86                      82
##      defending_standing_tackle defending_sliding_tackle
## 6920                        84                       83
## 2161                        86                       83
## 4459                        87                       82
## 5092                        84                       86
## 369                         72                       69
## 4486                        85                       84
dflm = lm(value_eur ~ overall+ age + wage_eur * potential + defending * defending_sliding_tackle + defending_standing_tackle + mentality_interceptions 
          * mentality_aggression * physic,data = train.t)
summary(dflm)
## 
## Call:
## lm(formula = value_eur ~ overall + age + wage_eur * potential + 
##     defending * defending_sliding_tackle + defending_standing_tackle + 
##     mentality_interceptions * mentality_aggression * physic, 
##     data = train.t)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -15425254   -906135   -182308    742276  19891571 
## 
## Coefficients:
##                                                       Estimate Std. Error
## (Intercept)                                         -1.140e+08  2.655e+07
## overall                                              6.875e+05  3.545e+04
## age                                                 -2.762e+05  2.971e+04
## wage_eur                                            -1.444e+03  4.192e+01
## potential                                           -1.951e+05  3.057e+04
## defending                                           -5.358e+05  8.929e+04
## defending_sliding_tackle                            -5.080e+05  7.384e+04
## defending_standing_tackle                            6.335e+04  3.161e+04
## mentality_interceptions                              1.846e+06  4.196e+05
## mentality_aggression                                 2.256e+06  3.938e+05
## physic                                               1.810e+06  4.009e+05
## wage_eur:potential                                   1.828e+01  4.916e-01
## defending:defending_sliding_tackle                   7.760e+03  1.111e+03
## mentality_interceptions:mentality_aggression        -3.374e+04  5.957e+03
## mentality_interceptions:physic                      -2.737e+04  6.178e+03
## mentality_aggression:physic                         -3.324e+04  5.656e+03
## mentality_interceptions:mentality_aggression:physic  4.960e+02  8.475e+01
##                                                     t value Pr(>|t|)    
## (Intercept)                                          -4.296 1.88e-05 ***
## overall                                              19.393  < 2e-16 ***
## age                                                  -9.298  < 2e-16 ***
## wage_eur                                            -34.460  < 2e-16 ***
## potential                                            -6.384 2.43e-10 ***
## defending                                            -6.001 2.56e-09 ***
## defending_sliding_tackle                             -6.879 9.50e-12 ***
## defending_standing_tackle                             2.004   0.0453 *  
## mentality_interceptions                               4.399 1.18e-05 ***
## mentality_aggression                                  5.729 1.27e-08 ***
## physic                                                4.515 6.93e-06 ***
## wage_eur:potential                                   37.190  < 2e-16 ***
## defending:defending_sliding_tackle                    6.984 4.65e-12 ***
## mentality_interceptions:mentality_aggression         -5.664 1.84e-08 ***
## mentality_interceptions:physic                       -4.430 1.02e-05 ***
## mentality_aggression:physic                          -5.877 5.34e-09 ***
## mentality_interceptions:mentality_aggression:physic   5.852 6.18e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2053000 on 1251 degrees of freedom
## Multiple R-squared:  0.9163, Adjusted R-squared:  0.9152 
## F-statistic: 855.8 on 16 and 1251 DF,  p-value: < 2.2e-16
dflm.pred <- predict(dflm, valid.t)
accuracy(dflm.pred, valid.t$value_eur)
##                 ME    RMSE     MAE  MPE MAPE
## Test set -103471.1 1870189 1184071 -Inf  Inf
defenders[defenders$overall == 86 & defenders$age == 27, c("short_name","value_eur")]
##      short_name value_eur
## 6841  R. Varane  46500000
predict(dflm,data.frame(overall = 86 ,age = 27, wage_eur = 230000, potential = 86, defending = 82, defending_standing_tackle =83, defending_sliding_tackle
          = 85, mentality_aggression = 82, mentality_interceptions = 83, physic = 80))
##        1 
## 46060782

In the above, model, R. Varane market value is predicted


train.index = sample(seq(1,nrow(midfielders)), floor(0.8*nrow(midfielders)))
selected.var <- c(2,3,8:11,21,22,34,36,40,50,53,54)

train.t <- midfielders[train.index, selected.var]
valid.t <- midfielders[-train.index, selected.var]

head(valid.t)
##        short_name age overall potential value_eur wage_eur passing dribbling
## 4690 K. De Bruyne  29      91        91  87000000   370000      93        88
## 4448   J. Kimmich  25      88        90  65000000   145000      86        84
## 4242 J. Henderson  30      86        86  36500000   140000      84        79
## 1531   C. Eriksen  28      85        85  39000000   155000      88        81
## 3266    H. Ziyech  27      85        86  44000000   140000      87        84
## 7461    S. Gnabry  24      85        87  47500000   100000      78        86
##      attacking_short_passing skill_dribbling skill_ball_control
## 4690                      94              88                 92
## 4448                      87              83                 85
## 4242                      86              78                 84
## 1531                      90              77                 90
## 3266                      86              83                 85
## 7461                      79              87                 83
##      power_long_shots mentality_positioning mentality_vision
## 4690               91                    88               94
## 4448               84                    80               84
## 4242               75                    78               83
## 1531               89                    82               90
## 3266               77                    82               89
## 7461               81                    85               83
mflm = lm(value_eur ~ overall+ age + wage_eur + passing * attacking_short_passing + dribbling  * skill_dribbling + skill_ball_control 
           + mentality_positioning * mentality_vision * power_long_shots,data = train.t)
summary(mflm)
## 
## Call:
## lm(formula = value_eur ~ overall + age + wage_eur + passing * 
##     attacking_short_passing + dribbling * skill_dribbling + skill_ball_control + 
##     mentality_positioning * mentality_vision * power_long_shots, 
##     data = train.t)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -10709161  -1054194    136739   1095472  21904568 
## 
## Coefficients:
##                                                           Estimate Std. Error
## (Intercept)                                             -4.952e+06  1.541e+07
## overall                                                  5.280e+05  3.303e+04
## age                                                     -2.829e+05  2.384e+04
## wage_eur                                                 1.347e+02  4.318e+00
## passing                                                 -1.204e+06  1.262e+05
## attacking_short_passing                                 -1.076e+06  1.145e+05
## dribbling                                               -8.530e+05  1.093e+05
## skill_dribbling                                         -8.908e+05  9.868e+04
## skill_ball_control                                      -6.981e+04  3.627e+04
## mentality_positioning                                    1.898e+06  2.226e+05
## mentality_vision                                         1.914e+06  2.143e+05
## power_long_shots                                         2.304e+06  2.105e+05
## passing:attacking_short_passing                          1.661e+04  1.703e+03
## dribbling:skill_dribbling                                1.280e+04  1.310e+03
## mentality_positioning:mentality_vision                  -3.040e+04  3.511e+03
## mentality_positioning:power_long_shots                  -3.682e+04  3.497e+03
## mentality_vision:power_long_shots                       -3.699e+04  3.409e+03
## mentality_positioning:mentality_vision:power_long_shots  5.862e+02  5.240e+01
##                                                         t value Pr(>|t|)    
## (Intercept)                                              -0.321   0.7480    
## overall                                                  15.985  < 2e-16 ***
## age                                                     -11.866  < 2e-16 ***
## wage_eur                                                 31.206  < 2e-16 ***
## passing                                                  -9.545  < 2e-16 ***
## attacking_short_passing                                  -9.394  < 2e-16 ***
## dribbling                                                -7.805 1.23e-14 ***
## skill_dribbling                                          -9.027  < 2e-16 ***
## skill_ball_control                                       -1.925   0.0545 .  
## mentality_positioning                                     8.526  < 2e-16 ***
## mentality_vision                                          8.934  < 2e-16 ***
## power_long_shots                                         10.947  < 2e-16 ***
## passing:attacking_short_passing                           9.754  < 2e-16 ***
## dribbling:skill_dribbling                                 9.774  < 2e-16 ***
## mentality_positioning:mentality_vision                   -8.657  < 2e-16 ***
## mentality_positioning:power_long_shots                  -10.531  < 2e-16 ***
## mentality_vision:power_long_shots                       -10.852  < 2e-16 ***
## mentality_positioning:mentality_vision:power_long_shots  11.188  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2515000 on 1276 degrees of freedom
## Multiple R-squared:  0.9004, Adjusted R-squared:  0.8991 
## F-statistic: 678.6 on 17 and 1276 DF,  p-value: < 2.2e-16
mflm.pred <- predict(mflm, valid.t)
accuracy(mflm.pred, valid.t$value_eur)
##                ME    RMSE     MAE MPE MAPE
## Test set 8733.147 2937471 1783696 NaN  Inf
midfielders[midfielders$overall == 91 & midfielders$age == 29, c("short_name","value_eur")]
##        short_name value_eur
## 4690 K. De Bruyne  87000000
predict(mflm,data.frame(overall = 91 ,age = 29, wage_eur = 370000, potential = 91, passing = 93, dribbling =88, attacking_short_passing
          = 94, skill_dribbling = 88, skill_ball_control = 93, power_long_shots = 91, mentality_positioning = 88, mentality_vision = 94))
##        1 
## 91200362

In the above, model, K.De Bruyne market value is predicted


train.index = sample(seq(1,nrow(forwards)), floor(0.8*nrow(forwards)))
selected.var <- c(2,3,8:11,19:20,32,35,37,41,42,46,31)

train.t <- forwards[train.index, selected.var]
valid.t <- forwards[-train.index, selected.var]

head(valid.t)
##         short_name age overall potential value_eur wage_eur pace shooting
## 4604    K. Benzema  32      89        89  53000000   350000   74       85
## 6588     P. Dybala  26      88        89  71000000   190000   85       85
## 2210    D. Mertens  33      85        85  24000000   115000   86       82
## 5841      M. Depay  26      85        88  48500000   115000   86       83
## 8220 W. Ben Yedder  29      84        84  31000000    91000   84       83
## 5077    L. Ocampos  25      82        83  26500000    40000   82       83
##      attacking_finishing attacking_volleys skill_curve movement_acceleration
## 4604                  88                86          81                    77
## 6588                  84                88          88                    89
## 2210                  84                70          82                    90
## 5841                  83                74          85                    85
## 8220                  88                84          80                    86
## 5077                  85                81          80                    80
##      movement_sprint_speed power_shot_power attacking_crossing
## 4604                    72               84                 75
## 6588                    82               82                 82
## 2210                    82               80                 79
## 5841                    87               87                 83
## 8220                    82               83                 74
## 5077                    83               84                 79
flm = lm(value_eur ~ overall+ age + wage_eur + shooting * attacking_finishing * power_shot_power+ attacking_volleys  + attacking_crossing * skill_curve
          + movement_acceleration + pace  + movement_sprint_speed   , data = train.t)
summary(flm)
## 
## Call:
## lm(formula = value_eur ~ overall + age + wage_eur + shooting * 
##     attacking_finishing * power_shot_power + attacking_volleys + 
##     attacking_crossing * skill_curve + movement_acceleration + 
##     pace + movement_sprint_speed, data = train.t)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -19476071  -1470218     91121   1364766  48663504 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                   -3.477e+08  5.965e+07  -5.828
## overall                                        5.394e+05  7.368e+04   7.321
## age                                           -3.549e+05  4.991e+04  -7.111
## wage_eur                                       1.538e+02  7.265e+00  21.166
## shooting                                       4.503e+06  1.165e+06   3.867
## attacking_finishing                            5.213e+06  1.058e+06   4.926
## power_shot_power                               6.781e+06  8.599e+05   7.886
## attacking_volleys                              5.511e+03  3.027e+04   0.182
## attacking_crossing                            -3.642e+05  5.139e+04  -7.088
## skill_curve                                   -3.156e+05  5.053e+04  -6.247
## movement_acceleration                         -2.006e+05  2.272e+05  -0.883
## pace                                           5.203e+05  5.032e+05   1.034
## movement_sprint_speed                         -2.855e+05  2.800e+05  -1.020
## shooting:attacking_finishing                  -6.885e+04  1.409e+04  -4.887
## shooting:power_shot_power                     -9.424e+04  1.459e+04  -6.459
## attacking_finishing:power_shot_power          -1.016e+05  1.554e+04  -6.538
## attacking_crossing:skill_curve                 6.257e+03  8.721e+02   7.174
## shooting:attacking_finishing:power_shot_power  1.391e+03  1.779e+02   7.816
##                                               Pr(>|t|)    
## (Intercept)                                   9.18e-09 ***
## overall                                       7.99e-13 ***
## age                                           3.31e-12 ***
## wage_eur                                       < 2e-16 ***
## shooting                                      0.000122 ***
## attacking_finishing                           1.09e-06 ***
## power_shot_power                              1.48e-14 ***
## attacking_volleys                             0.855611    
## attacking_crossing                            3.86e-12 ***
## skill_curve                                   7.93e-10 ***
## movement_acceleration                         0.377619    
## pace                                          0.301565    
## movement_sprint_speed                         0.308239    
## shooting:attacking_finishing                  1.32e-06 ***
## shooting:power_shot_power                     2.18e-10 ***
## attacking_finishing:power_shot_power          1.34e-10 ***
## attacking_crossing:skill_curve                2.16e-12 ***
## shooting:attacking_finishing:power_shot_power 2.47e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3599000 on 598 degrees of freedom
## Multiple R-squared:  0.893,  Adjusted R-squared:   0.89 
## F-statistic: 293.6 on 17 and 598 DF,  p-value: < 2.2e-16
flm.pred <- predict(flm, valid.t)
accuracy(flm.pred, valid.t$value_eur)
##                 ME    RMSE     MAE  MPE MAPE
## Test set -659114.9 4314508 2646498 -Inf  Inf
forwards[forwards$overall == 87 & forwards$age == 30, c("short_name","value_eur")]
##       short_name value_eur
## 1601 C. Immobile  48500000
predict(flm,data.frame(overall = 87 ,age = 30, wage_eur = 125000, potential = 87, pace = 84, shooting = 88, attacking_finishing
          = 93, attacking_volleys = 85, skill_curve = 70, movement_acceleration = 82, movement_sprint_speed = 85, power_shot_power = 86, 
          attacking_crossing=55))
##        1 
## 47027303

In the above, model, Immobile market value is predicted


To find the best playing eleven under a given budget (100 million)

To find this, we have used the classic 4-3-3 formation in football

source("fun2.R")
playingEleven <- best_eleven(budget = 100000000, samplefifa21)
playingEleven
##           short_name age team_position value_eur
## 4177        J. Omlin  26            GK   8500000
## 3762       J. Justin  22            LB   5500000
## 206          A. Long  27            FB   7000000
## 8081      V. Nelsson  21            FB   8000000
## 107      T. Tomiyasu  21            RB   4900000
## 2386      E. Atuesta  23           DMF   5500000
## 6962       R. McGree  21           CMF   3300000
## 7121     R. McCrorie  22           CMF   1200000
## 6671 P. Zinckernagel  25            RW   5000000
## 2062       D. Kutesa  22            LW   1600000
## 1550     C. Bassogog  24            ST   5000000

country <- table(samplefifa21$nationality)
country <- data.frame(country)
head(country)
##                Var1 Freq
## 1           Albania    7
## 2           Algeria   15
## 3            Angola    4
## 4 Antigua & Barbuda    1
## 5         Argentina  276
## 6           Armenia    1

Finding the Distribution of Better Players Region wise

WorldData <- map_data('world') %>% fortify

player_map <- ggplot() +
  geom_map(data = WorldData, map = WorldData,
           aes(x = long, y = lat, group = group, map_id=region),
           fill = "white", colour = "#7f7f7f", size=0.5) + 
  geom_map(data = country, map=WorldData,
           aes(fill=Freq, map_id=Var1),
           colour="#7f7f7f", size=0.5) +
  coord_map("rectangular", lat0=0, xlim=c(-180,180), ylim=c(-60, 90)) +
  scale_fill_continuous(low="#E67E22", high="#1ABC9C", guide="colorbar") +
  labs(fill="Frequency", title="Distribution of Players across the globe") +
  theme_bw()

player_map